import scrapy

from ..items import BookCrapyItem

class BookSpider(scrapy.Spider):
    name = "book"
    allowed_domains = ["book.douban.com"]
    start_urls = ["http://book.douban.com/latest?subcat=全部&p=1"]

    def parse(self, response):
        item = BookCrapyItem()

        print(response.text)

        books = response.xpath('//ul[@class="chart-dashed-list"]/li[@class="media clearfix"]')

        for book in books:
            item['name'] = book.xpath('.//h2[@class="clearfix"]/a/text()').get()
            yield item

        if len(response.xpath('//span[@class="next"]/a')) != 0:
            print('爬取下一页')
            current_url = response.url
            print("当前url:", current_url)
            page = int(current_url.split('p=')[1]) + 1
            next_url = current_url.split('p=')[0] + 'p=' + str(page)
            print("下一页url:", next_url)
            yield response.follow(next_url, self.parse)
